Step1: Compute AUROC and ACC Row Sums and Median values under different weights

We identify the top methods for network structure prediction by computing the area under the receiver operating characteristic (AUROC) and accuracy (ACC) row sum and median values for each of the 28 networks. We weight these variables at 5 different ranges:

for (sim in (1:28)){
  AUC<-read.csv(paste0("sim",sim,"/Combined_Data_onlyAUC.csv"),row.names = 1) 
  ACC<-read.csv(paste0("sim",sim,"/Combined_Data_onlyACC.csv"),row.names = 1) 

  #calculate AUC only sums and medians
  AUC_sum<-cbind(AUC$Method_name,rowSums(AUC[,-1])) %>% as.data.frame()
  colnames(AUC_sum)<-c("Method_name","Sum_of_avg_across_patients")
  AUC_sum$Sum_of_avg_across_patients<-as.numeric(AUC_sum$Sum_of_avg_across_patients)

  AUC_median<-cbind(AUC$Method_name,apply(AUC[-1], 1, median)) %>% as.data.frame()
  colnames(AUC_median)<-c("Method_name","Median_across_patients")

  write.csv(AUC_sum,paste0('sim',sim,'/Combined_data_sums_onlyAUC.csv'),quote=F)
  write.csv(AUC_median,paste0('sim',sim,'/Combined_data_median_onlyAUC.csv'),quote=F)

  #calculate ACC only sums and medians
  ACC_sum<-cbind(ACC$Method_name,rowSums(ACC[,-1])) %>% as.data.frame()
  colnames(ACC_sum)<-c("Method_name","Sum_of_avg_across_patients")
  ACC_sum$Sum_of_avg_across_patients<-as.numeric(ACC_sum$Sum_of_avg_across_patients)


  ACC_median<-cbind(ACC$Method_name,apply(ACC[-1], 1, median)) %>% as.data.frame()
  colnames(ACC_median)<-c("Method_name","Median_across_patients")

  write.csv(ACC_sum,paste0('sim',sim,'/Combined_data_sums_onlyACC.csv'),quote=F)
  write.csv(ACC_median,paste0('sim',sim,'/Combined_data_median_onlyACC.csv'),quote=F)

  #  calculate equal weights sums and medians
  AUC_half<-cbind(sapply(AUC[,-1],"*",0.5))
  ACC_half<-cbind(sapply(ACC[,-1],"*",0.5))
  equal<-cbind( AUC_half + ACC_half)


  equal_sum<-cbind(AUC$Method_name,rowSums(equal)) %>% as.data.frame()
  colnames(equal_sum)<-c("Method_name","Sum_of_avg_across_patients")
  equal_sum$Sum_of_avg_across_patients<-as.numeric(equal_sum$Sum_of_avg_across_patients)

  equal_median<-cbind(AUC$Method_name,apply(equal, 1, median)) %>% as.data.frame()
  colnames(equal_median)<-c("Method_name","Median_across_patients")


  write.csv(equal,paste0('sim',sim,'/Combined_data_equal.csv'),quote=F)
  write.csv(equal_sum,paste0('sim',sim,'/Combined_data_sums_equal.csv'),quote=F)
  write.csv(equal_median,paste0('sim',sim,'/Combined_data_median_equal.csv'),quote=F)


  #calculate 0.3 AUROC and 0.7 ACC weights sums and medians
  AUC_low<-cbind(sapply(AUC[,-1],"*",0.3))
  ACC_high<-cbind(sapply(ACC[,-1],"*",0.7))
  three_seven<-cbind( AUC_low + ACC_high)


  three_seven_sum<-cbind(AUC$Method_name,rowSums(three_seven)) %>% as.data.frame()
  colnames(three_seven_sum)<-c("Method_name","Sum_of_avg_across_patients")
  three_seven_sum$Sum_of_avg_across_patients<-as.numeric(three_seven_sum$Sum_of_avg_across_patients)

  three_seven_median<-cbind(AUC$Method_name,apply(three_seven, 1, median)) %>% as.data.frame()
  colnames(three_seven_median)<-c("Method_name","Median_across_patients")

  write.csv(three_seven,paste0('sim',sim,'/Combined_data_37.csv'),quote=F)
  write.csv(three_seven_sum,paste0('sim',sim,'/Combined_data_sums_37.csv'),quote=F)
  write.csv(three_seven_median,paste0('sim',sim,'/Combined_data_median_37.csv'),quote=F)

  #calculate 0.7 AUROC and 0.3 ACC weights sums and medianssums and medians}
  AUC_high<-cbind(sapply(AUC[,-1],"*",0.7))
  ACC_low<-cbind(sapply(ACC[,-1],"*",0.3))
  seven_three<-cbind( AUC_low + ACC_high)

  seven_three_sum<-cbind(AUC$Method_name,rowSums(seven_three)) %>% as.data.frame()
  colnames(seven_three_sum)<-c("Method_name","Sum_of_avg_across_patients")
  seven_three_sum$Sum_of_avg_across_patients<-as.numeric(seven_three_sum$Sum_of_avg_across_patients)


  seven_three_median<-cbind(AUC$Method_name,apply(seven_three, 1, median)) %>% as.data.frame()
  colnames(seven_three_median)<-c("Method_name","Median_across_patients")

  write.csv(seven_three,paste0('sim',sim,'/Combined_data_73.csv'),quote=F)
  write.csv(seven_three_sum,paste0('sim',sim,'/Combined_data_sums_73.csv'),quote=F)
  write.csv(seven_three_median,paste0('sim',sim,'/Combined_data_median_73.csv'),quote=F)
}

Step2: Compute Average Row Sums and Median values under different weights

Next, we compute the average Row Sums and Median values under the 5 AUROC and ACC weights.

patterns<-c("Combined_data_sums_onlyAUC","Combined_data_sums_onlyACC","Combined_data_sums_equal","Combined_data_sums_37","Combined_data_sums_73")

dir.create("averages")

for (pattern in patterns){
  count_files <- list.files(recursive=TRUE,pattern=paste0("^",pattern,".csv"),full.names=TRUE)
  d1<-data.frame()
  for (i in count_files) {
    # read file i as a data frame
    f <- read.csv(i,row.names = 1)
    if(length(d1) == 0){
      d1 <- f
    }else
    {
     d1$Sum_of_avg_across_patients<-d1$Sum_of_avg_across_patients+f$Sum_of_avg_across_patients
    }
  }
    d1$Sum_of_avg_across_patients<-d1$Sum_of_avg_across_patients/28
    d1$Sum_of_avg_across_patients<-d1$Sum_of_avg_across_patients/50
    d1<- d1 %>% arrange(desc(Sum_of_avg_across_patients))
    write.csv(d1,paste0("averages/Average_",pattern,".csv"),quote=F)
}
patterns<-c("Combined_data_median_onlyAUC","Combined_data_median_onlyACC","Combined_data_median_equal","Combined_data_median_37","Combined_data_median_73")

for (pattern in patterns){
  count_files <- list.files(recursive=TRUE,pattern=paste0("^",pattern,".csv"),full.names=TRUE)
  count_files<-count_files[!grepl("all", count_files)]
  d1<-data.frame()
  for (i in count_files) {
    # read file i as a data frame
    f <- read.csv(i,row.names = 1)
    if(length(d1) == 0){
      d1 <- f
    }else
    {
     d1$Median_across_patients<-d1$Median_across_patients+f$Median_across_patients
    }
  }
    d1$Median_across_patients<-d1$Median_across_patients/28
    d1<- d1 %>% arrange(desc(Median_across_patients))
    write.csv(d1,paste0("averages/Average_Median_",pattern,".csv"),quote=F)
}
rm(list = ls())

Step3: Generate heatmaps of Average Row Sums and Median values under different weights

To identify the top performing network methods across all 28 networks and under different weights of the AUROC and ACC, we generate a heatmap with hierarchical clustering

pal<-rev(c("#FF0000","#000000","#031e33","#0b5394","#38761d","#ffe700","#FFFFFF"))
count_files <- list.files(path="averages",recursive=TRUE,pattern=paste0("Average_Combined"),full.names=TRUE)

d1<-data.frame()
for (i in count_files){
  f<-read.csv(i,row.names = 1)

  if(length(d1) == 0){
    d1<-f
  }
  else{
    d1<- merge(d1, f, by.x = "Method_name", by.y = "Method_name")
  }

  }


  rownames(d1)<-d1$Method_name
  colnames(d1)<-c("Method_name","AUC=0.3,ACC=0.7","AUC=0.7,ACC=0.3","AUC=0.5,ACC=0.5","AUC=0,ACC=1","AUC=1,ACC=0")
  d1<- d1 %>% select(-Method_name)
  heatmaply(d1,colors = pal,show_dendrogram = c(TRUE, FALSE),labCol=c("Weight<sub>AUC=0.3</sub>,Weight<sub>ACC=0.7</sub>","Weight<sub>AUC=0.7</sub>,Weight<sub>ACC=0.3</sub>","Weight<sub>AUC=0.5</sub>,Weight<sub>ACC=0.5</sub>","Weight<sub>AUC=0</sub>,Weight<sub>ACC=1</sub>","Weight<sub>AUC=1</sub>,Weight<sub>ACC=0</sub>"),main="Average Row Sums Value across 28 Networks",file = "heatmap_rowsums.pdf",width=800,height=1000,margins = c(40, 50)) %>%
layout(height = 900)





















patterns<-c("Combined_data_median_onlyAUC","Combined_data_median_onlyACC","Combined_data_median_equal","Combined_data_median_37","Combined_data_median_73")

count_files <- list.files(path="averages",recursive=TRUE,pattern=paste0("Average_Median"),full.names=TRUE)

d1<-data.frame()
for (i in count_files){
  f<-read.csv(i,row.names = 1)

  if(length(d1) == 0){
    d1<-f
  }
  else{
    d1<- merge(d1, f, by.x = "Method_name", by.y = "Method_name")
  }

  }


  rownames(d1)<-d1$Method_name
  colnames(d1)<-c("Method_name","AUC=0.3,ACC=0.7","AUC=0.7,ACC=0.3","AUC=0.5,ACC=0.5","AUC=0,ACC=1","AUC=1,ACC=0")
  d1<- d1 %>% select(-Method_name)
  heatmaply(d1,colors = pal,show_dendrogram = c(TRUE, FALSE),labCol=c("Weight<sub>AUC=0.3</sub>,Weight<sub>ACC=0.7</sub>","Weight<sub>AUC=0.7</sub>,Weight<sub>ACC=0.3</sub>","Weight<sub>AUC=0.5</sub>,Weight<sub>ACC=0.5</sub>","Weight<sub>AUC=0</sub>,Weight<sub>ACC=1</sub>","Weight<sub>AUC=1</sub>,Weight<sub>ACC=0</sub>"),main="Average Median Value across 28 Networks",file = "heatmap_medians.pdf",width=800,height=1000,margins = c(40, 50))  %>%
  layout(height = 900)





















Step4: Generate line plots of Average Row Sums and Median AUROC and ACC values under different weights

Using our heatmaps, we see that the top combined methods of network structure prediction are BF_PFmean, T_BCmean, T_BF_BCmean, T_BF_PF_BCmean, T_BF_PFmean, T_BFmean, T_PF_BCmean, T_PFmean. We can see how these compare to the individual methods (Tigress, BCorrU, BCohF, and PCohF) in all 28 networks individually using line plots

create_rowsum_lineplot<-function(pattern,methods){
  count_files <- list.files(recursive=T,pattern=paste0("^",pattern),full.names=TRUE)
  d1<-data.frame()
  for (i in count_files) {
    f <- read.csv(i,row.names = 1) %>% filter(Method_name %in% methods)
    f<- cbind(f,c(rep(unlist(strsplit(i,'/'))[2],nrow(f))))
    colnames(f)[3]<-c('time_series')

    #if the counts object is empty just copy the f to m
    if(length(d1) == 0){
      d1 <- f

    } else
    {
      #if the dataframe is not empty then merge the data
      d1 <- rbind(d1,f)
    }
  }

  d1$time_series<-gsub("sim","",d1$time_series)
  d1$time_series<-str_pad(d1$time_series, 2, pad = "0")
  colnames(d1)<-c("Method","Row Sum","Network")
  d1$`Row Sum`<-d1$`Row Sum`/50

  return(d1)
}
create_median_lineplot<-function(pattern,methods){
  count_files <- list.files(recursive=T,pattern=paste0("^",pattern),full.names=TRUE)
  d1<-data.frame()
  for (i in count_files) {
    f <- read.csv(i,row.names = 1) %>% filter(Method_name %in% methods)
    f<- cbind(f,c(rep(unlist(strsplit(i,'/'))[2],nrow(f))))
    colnames(f)[3]<-c('time_series')

    #if the counts object is empty just copy the f to m
    if(length(d1) == 0){
      d1 <- f

    } else
    {
      #if the dataframe is not empty then merge the data
      d1 <- rbind(d1,f)
    }
  }

  d1$time_series<-gsub("sim","",d1$time_series)
  d1$time_series<-str_pad(d1$time_series, 2, pad = "0")
  colnames(d1)<-c("Method","Median","Network")

  return(d1)
}

We can also plot all 26 combined and individual methods on an interactive line plot